'''
The purpose of this script is to investigate the bot results from 02b.  It will generate histograms by country as well as table of likely bots by country.

For Supplementary Materials, creates Table A13.
'''
###########
#
# GLOBALS
#
###########
#setwd('<path/to/Replication/>')


library(dplyr)
library(stargazer)
library(ggplot2)


###########
#
# DATA
#
###########
data <- read.csv('./Data/02_processedData/c2_DonghyeonAlexmerged_classifiers_shortSpain.csv', stringsAsFactors=FALSE)

data$country <- gsub("b'", "", data$country)
data$country <- gsub("'", "", data$country)

data$bot_cap_universal[data$bot_cap_universal=="User_no_longer_available"] <- NA
data$bot_cap_universal <- as.numeric(data$bot_cap_universal)

###########
#
# WORK
#
###########

### Density plot
countries <- unique(data$country)

for(i in 1:length(countries)){
	temp <- data[data$country == countries[i],]

	jpeg(paste0('./Figures/BotHistogram_', countries[i], '.jpeg'))
	#hist(temp$bot_cap_universal, breaks=100, xlim=c(0,1), xlab='Botometer Complete Automation Probability', density=TRUE, main='')
	plot(density(temp$bot_cap_universal, na.rm=TRUE, bw=.01), xlim=c(0,1), xlab='Botometer Complete Automation Probability', main="")
	dev.off()
}



### Table by city
temp <- data.frame(data %>% group_by(city_use) %>% summarize(mean_bot = mean(bot_cap_universal, na.rm=TRUE), max_bot = max(bot_cap_universal, na.rm=TRUE), sd_bot = sd(bot_cap_universal, na.rm=TRUE), percent_bot_tweets = mean(bot, na.rm=TRUE)))

thesecities <- c("Barcelona","Ciutat Vella","Girona",  "Granera",  "Granollers",  "Lleida",  "Mataró",   "Reus",     "Sabadell", "Sant Cugat del Vallès",      "Sant Feliu de Pallerols",    "Sant Salvador de Guardiola", "Tarragona","Terrassa", "Central", "Kowloon", "Seoul", "Kimhae",  "Lahore",   "Caracas",  "Valencia","Caucagua", "Boca del Rio","Maracaibo")

temp <- temp[temp$city_use %in% thesecities,]

### Percent bot accounts
uniques <- data[!duplicated(data$user_id),]
uniques$bot <- ifelse(uniques$bot_cap_universal >= threshold, 1, 0)

temp2 <- data.frame(uniques %>% group_by(city_use) %>% summarize(percent_bot_accounts=mean(bot, na.rm=TRUE)))

temp <- merge(temp, temp2, on.x='city_use', on.y='city_use')


stargazer(temp[order(temp$percent_bot_tweets, decreasing=TRUE),], summary=FALSE, out='./Tables/bot_table_byCity.tex', rownames=FALSE)
